Back

ML_LULC Train and Forecast

Libs

Python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

Load and Prepare Data

Python
# Load Excel File
file_path = r"\LULC\LULC_Analysis.xlsx"  # Change With Your Excel file
df = pd.read_excel(file_path)

# Extract year and features
df = df[['Year', 'Land Use Name', 'Area (sq km)']].copy()
pivot_df = df.pivot_table(index='Year', columns='Land Use Name', values='Area (sq km)', aggfunc='sum')
pivot_df = pivot_df.sort_index()
pivot_df = pivot_df.interpolate(limit_direction='both')  # Fill missing values if any

# Normalize features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(pivot_df)

# Set sequence length for LSTM input
def create_sequences(data, seq_len=5):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len])
    return np.array(X), np.array(y)

sequence_length = 5
X, y = create_sequences(scaled_data, sequence_length)

# Split into train/test
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)

Build LSTM Model

Python
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(64),
    Dropout(0.2),
    Dense(y.shape[1])
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

Train Model

Python
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=200, batch_size=8, verbose=1)

Evaluate Performance

Python
plt.figure(figsize=(12, 5))
plt.plot(history.history['loss'], label='Train Loss', color='blue')
plt.plot(history.history['val_loss'], label='Val Loss', color='orange')
plt.title('Train vs Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

Predict on Validation

Python
y_pred_scaled = model.predict(X_val)
y_pred = scaler.inverse_transform(y_pred_scaled)
y_val_actual = scaler.inverse_transform(y_val)

Actual vs Predicted Plot

Python
for i, col in enumerate(pivot_df.columns):
    plt.figure(figsize=(10, 4))
    plt.plot(y_val_actual[:, i], label="Actual")
    plt.plot(y_pred[:, i], label="Predicted")
    plt.title(f"Actual vs Predicted Area for: {col}")
    plt.xlabel("Time Steps")
    plt.ylabel("Area (sq km)")
    plt.legend()
    plt.grid(True)
    plt.show()

Forecasting

Python
future_steps = 2040 - pivot_df.index.max()
last_sequence = scaled_data[-sequence_length:]
future_preds_scaled = []

for _ in range(future_steps):
    prediction = model.predict(last_sequence[np.newaxis, :, :])
    future_preds_scaled.append(prediction[0])
    last_sequence = np.vstack([last_sequence[1:], prediction])

future_preds = scaler.inverse_transform(np.array(future_preds_scaled))

Combine with Existing

Python
future_years = np.arange(pivot_df.index.max() + 1, 2041)
future_df = pd.DataFrame(future_preds, columns=pivot_df.columns, index=future_years)
combined_df = pd.concat([pivot_df, future_df])

Plot Actual vs Predicted

Python
combined_df.plot(figsize=(15, 5), title='LULC Area Forecast 2024-2040')
plt.axvline(2023, color='gray', linestyle='--', label='Forecast Starts')
plt.legend()
plt.xlabel('Year')
plt.ylabel('Area (sq km)')
plt.grid(True)
plt.tight_layout()
plt.show()
Python
for col in ['Croplands', 'Urban and Built-Up', 'Permanent Wetlands']:
    plt.figure(figsize=(10, 5))
    plt.plot(combined_df.index, combined_df[col], label=col)
    plt.axvline(2023, color='gray', linestyle='--', label='Forecast Starts')
    plt.title(f"LULC {col} Forecast 2024–2040")
    plt.xlabel("Year")
    plt.ylabel("Area (sq km)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

Confusion Matrix

Python
trend_class = lambda arr: [1 if j > i else -1 if j < i else 0 for i, j in zip(arr[:-1], arr[1:])]
for i, col in enumerate(pivot_df.columns):
    actual_trend = trend_class(y_val_actual[:, i])
    pred_trend = trend_class(y_pred[:, i])
    cm = confusion_matrix(actual_trend, pred_trend, labels=[-1, 0, 1])
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["↓", "→", "↑"])
    disp.plot(cmap='Blues')
    plt.title(f'Trend Direction Confusion Matrix: {col}')
    plt.grid(False)
    plt.show()